
clear all
set more off
cap log close
timer clear 1
timer on 1

log using "$log/04_find_annual_dominant_job.log", replace

use "$data\full_with_E_new_clean.dta", clear

cap drop probies
rename entry_german german
rename entry_gender gender
rename tentgelt wage 
rename train_lenght train_length

local stablevars = "trained certificate train_end train_end_age first train_firm gender german train_occ day_entry year_entry no_trainings train_edu train_length train_start train_start_age train_start_year train_start_bula train_start_ind first_train_beruf "

*Make sure time-invariant variables are non-missing for all person-year observations
foreach var in `stablevars' {
	bysort persnr: egen double check = mean(`var')
	assert check==`var'
	drop check
	}

gen spell_earnings = spell_length * wage
gen spell_earnings_ft	  = spell_length * wage if pt == 0
gen spell_length_ft   = spell_length if pt == 0


rename beruf beruf_org
count if missing(beruf_org)
bysort persnr betnr year: egen max_spell = max(spell_length)
gen beruf = beruf_org if spell_length == max_spell
drop max_spell
count if missing(beruf)

bysort persnr betnr year: egen beruf_mode = mode(beruf), maxmode
count if missing(beruf_mode)
drop beruf
rename beruf_mode beruf

rename beh_beruf_num beh_beruf_num_org
count if missing(beh_beruf_num_org)
bysort persnr betnr year: egen max_spell = max(spell_length)
gen beh_beruf_num = beh_beruf_num_org if spell_length == max_spell
drop max_spell
count if missing(beh_beruf_num)

bysort persnr betnr year: egen beruf_mode = mode(beh_beruf_num), maxmode
count if missing(beruf_mode)
drop beh_beruf_num
rename beruf_mode beh_beruf_num

keep `stablevars' pt beruf same_train_occ spell_length spell_length_ft spell_earnings spell_earnings_ft persnr betnr year beh_beruf_num
compress

fcollapse (mean) `stablevars' pt beruf beh_beruf_num same_train_occ (sum) spell_length spell_length_ft spell_earnings spell_earnings_ft, by(persnr betnr year) pool(15)

su spell_length, de
su spell_length_ft, de

gen wage_ft = spell_earnings_ft / spell_length_ft
gen wage    = spell_earnings    / spell_length
rename spell_earnings_ft est_earnings_ft
rename spell_earnings est_earnings

su pt, de
count if pt == 0 // pure full-time
count if pt > 0 & pt < 1 // full- and part-time 
count if pt == 1 // pure part-time

bysort persnr year: egen double aearnings_ft = sum(est_earnings_ft)
bysort persnr year: egen double aearnings    = sum(est_earnings)


*************************
* DOMINANT Establishment EARNINGS *
*************************

bysort persnr year: egen double temp = max(est_earnings_ft)
gen dom_job_ft = (est_earnings_ft == temp & est_earnings_ft!=0)
drop temp

bysort persnr year: egen double temp = max(est_earnings)
gen dom_job    = (est_earnings == temp & est_earnings!=0)
drop temp

tab dom_job_ft, mis
tab dom_job, mis

* Training industry - merge ind for train_firm
rename betnr firm_current 
rename train_firm betnr
merge m:1 betnr using "$temp/wz_bula", keepusing(ind bula)
drop if _merge==2
drop _merge 
rename ind train_ind
rename bula region_entry
rename betnr train_firm 
rename firm_current betnr  

* Current establishment: Merge ind and bula
merge m:1 betnr using "$temp/wz_bula", keepusing(ind bula)
rename ind current_ind
rename bula region_current
drop if _merge==2
drop _merge 

su current_ind
gen ind_2dig = floor(current_ind/10)
su ind_2dig

gen train_ind_2dig = floor(train_ind/10)
su train_ind
su train_ind_2dig

drop current_ind train_ind
rename train_ind_2dig train_ind
rename ind_2dig       current_ind

*Merge in firm characteristics
merge m:1 betnr year using "$temp/BHP_vars_year.dta"
drop if _merge==2
drop _merge
rename az_ges empl_current
rename az_azubi trainees_current
rename te_med med_wage_current
rename te_imp_med med_imp_wage_current

*Merge in training firm characteristics
rename betnr betnr_current
rename train_firm betnr
rename year year_current
rename year_entry year
merge m:1 betnr year using "$temp/BHP_vars_year.dta"
drop if _merge==2
drop _merge
rename az_ges empl_train
rename az_azubi trainees_train
rename te_med med_wage_train
rename te_imp_med med_imp_wage_train
rename year year_entry
rename year_current year
rename betnr train_firm
rename betnr_current betnr
		
*Working at training firm?
gen work_at_train = (betnr==train_firm)

*Work in training occ?
replace beruf     = . if beruf == -1
replace train_occ = . if train_occ == -1
gen work_in_train_occ = (beruf==train_occ)
replace work_in_train_occ = . if missing(beruf)
tab work_in_train_occ, mis

gen work_in_train_ind = (current_ind==train_ind) 
replace work_in_train_ind = . if missing(current_ind)
tab work_in_train_ind, mis

compress
	
merge m:1 year_entry region_entry using "$temp\U_internal.dta"
	drop if _merge==2
	drop _merge

drop if train_occ==98 | train_occ==55

gen month_entry = month(day_entry)

local vars "wage med_imp_wage_train"
foreach var in `vars' {
	gen log_`var' = ln(`var'+1)
	}

gen potential_experience =  year-year_entry

compress
save "$temp/analysis_pt_new", replace

********************************************************************************

set seed 666
bysort persnr: gen n = _n
gen x = floor(uniform()*100)+1 if n == 1
bysort persnr: egen rand = mean(x)
drop x n
su rand
keep if rand <= 2 

compress
save "$temp/analysis_pt_2p_new", replace

clear
cap log close

timer off 1
timer list 1
